1 Introduction

This pipeline performs reproducible microbiome sequence analysis using DADA2 for sequence processing and phyloseq for data management and visualization. The pipeline supports both bacterial and fungal amplicon sequencing data.

2 Load Libraries

# Load required packages
library(dada2)
library(phyloseq)
library(ggplot2)
library(yaml)
library(dplyr)
library(tidyr)

# Set seed for reproducibility
set.seed(123)

# Load configuration
config <- yaml::read_yaml("config.yaml")
analysis_type <- config$analysis_type

paste("Analysis type:", analysis_type, "\n")
## [1] "Analysis type: bacteria \n"

3 Setup and Data Paths

# Define paths
path <- "data/raw"  # Path to raw fastq files
output_path <- "output"
fig_path <- file.path(output_path, "figures")
table_path <- file.path(output_path, "tables")

# Create output directories if they don't exist
dir.create(fig_path, showWarnings = FALSE, recursive = TRUE)
dir.create(table_path, showWarnings = FALSE, recursive = TRUE)

# List files
fnFs <- sort(list.files(path, pattern = "_R1_001.fastq", full.names = TRUE))
fnRs <- sort(list.files(path, pattern = "_R2_001.fastq", full.names = TRUE))

# Extract sample names
sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) # this is the hardest custom line for a new programmer (or at least it was for me when I was learning)

print(paste0("Found", length(fnFs), "forward reads and", length(fnRs), "reverse reads\n"))
## [1] "Found19forward reads and19reverse reads\n"
print(paste0("Sample names:", paste(sample.names, collapse = ", "), "\n"))
## [1] "Sample names:F3D0, F3D1, F3D141, F3D142, F3D143, F3D144, F3D145, F3D146, F3D147, F3D148, F3D149, F3D150, F3D2, F3D3, F3D5, F3D6, F3D7, F3D8, F3D9\n"

4 Quality Control

4.1 Visualize Quality Profiles

if (length(fnFs) > 0) {
  # Plot quality profiles for forward reads
  plotQualityProfile(fnFs[1:min(2, length(fnFs))]) + 
    ggtitle("Forward Read Quality Profile")
  
  # Plot quality profiles for reverse reads
  plotQualityProfile(fnRs[1:min(2, length(fnRs))]) + 
    ggtitle("Reverse Read Quality Profile")
}

4.2 Filter and Trim

# Place filtered files in filtered/ subdirectory
filtFs <- file.path(path, "filtered", paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <- file.path(path, "filtered", paste0(sample.names, "_R_filt.fastq.gz"))
names(filtFs) <- sample.names
names(filtRs) <- sample.names

# Filter and trim
if (length(fnFs) > 0) {
  out <- filterAndTrim(
    fnFs, filtFs, fnRs, filtRs,
    truncLen = config$dada2$filter$truncLen,
    maxN = config$dada2$filter$maxN,
    maxEE = config$dada2$filter$maxEE,
    truncQ = config$dada2$filter$truncQ,
    rm.phix = config$dada2$filter$rm.phix,
    compress = config$dada2$filter$compress,
    multithread = TRUE
  )
  
  print(head(out))
  
  # Save filtering statistics
  if (config$output$save_intermediate) {
    write.csv(out, file.path(table_path, "filtering_stats.csv"))
  }
}
##                               reads.in reads.out
## F3D0_S188_L001_R1_001.fastq       7793      7113
## F3D1_S189_L001_R1_001.fastq       5869      5299
## F3D141_S207_L001_R1_001.fastq     5958      5463
## F3D142_S208_L001_R1_001.fastq     3183      2914
## F3D143_S209_L001_R1_001.fastq     3178      2941
## F3D144_S210_L001_R1_001.fastq     4827      4312

5 Learn Error Rates

if (length(fnFs) > 0) {
  # Learn forward read errors
  errF <- learnErrors(filtFs, 
                      nbases = config$dada2$learn_errors$nbases,
                      multithread = TRUE)
  
  # Learn reverse read errors
  errR <- learnErrors(filtRs, 
                      nbases = config$dada2$learn_errors$nbases,
                      multithread = TRUE)
  
  # Plot error rates
  plotErrors(errF, nominalQ = TRUE) +
    ggtitle("Forward Read Error Rates")
  
  plotErrors(errR, nominalQ = TRUE) +
    ggtitle("Reverse Read Error Rates")
}
## 2978880 total bases in 12412 reads from 2 samples will be used for learning the error rates.
## 2860000 total bases in 17875 reads from 3 samples will be used for learning the error rates.

6 Sample Inference

if (length(fnFs) > 0) {
  # Apply core DADA2 sample inference algorithm
  dadaFs <- dada(filtFs, err = errF, multithread = TRUE)
  dadaRs <- dada(filtRs, err = errR, multithread = TRUE)
  
  # Inspect the returned dada-class object
  cat("DADA2 inference on first sample:\n")
  print(dadaFs[[1]])
}
## Sample 1 - 7113 reads in 1979 unique sequences.
## Sample 2 - 5299 reads in 1639 unique sequences.
## Sample 3 - 5463 reads in 1477 unique sequences.
## Sample 4 - 2914 reads in 904 unique sequences.
## Sample 5 - 2941 reads in 939 unique sequences.
## Sample 6 - 4312 reads in 1267 unique sequences.
## Sample 7 - 6741 reads in 1756 unique sequences.
## Sample 8 - 4560 reads in 1438 unique sequences.
## Sample 9 - 15637 reads in 3590 unique sequences.
## Sample 10 - 11413 reads in 2762 unique sequences.
## Sample 11 - 12017 reads in 3021 unique sequences.
## Sample 12 - 5032 reads in 1566 unique sequences.
## Sample 13 - 18075 reads in 3707 unique sequences.
## Sample 14 - 6250 reads in 1479 unique sequences.
## Sample 15 - 4052 reads in 1195 unique sequences.
## Sample 16 - 7369 reads in 1832 unique sequences.
## Sample 17 - 4765 reads in 1183 unique sequences.
## Sample 18 - 4871 reads in 1382 unique sequences.
## Sample 19 - 6504 reads in 1709 unique sequences.
## Sample 1 - 7113 reads in 1660 unique sequences.
## Sample 2 - 5299 reads in 1349 unique sequences.
## Sample 3 - 5463 reads in 1335 unique sequences.
## Sample 4 - 2914 reads in 853 unique sequences.
## Sample 5 - 2941 reads in 880 unique sequences.
## Sample 6 - 4312 reads in 1286 unique sequences.
## Sample 7 - 6741 reads in 1803 unique sequences.
## Sample 8 - 4560 reads in 1265 unique sequences.
## Sample 9 - 15637 reads in 3414 unique sequences.
## Sample 10 - 11413 reads in 2522 unique sequences.
## Sample 11 - 12017 reads in 2771 unique sequences.
## Sample 12 - 5032 reads in 1415 unique sequences.
## Sample 13 - 18075 reads in 3290 unique sequences.
## Sample 14 - 6250 reads in 1390 unique sequences.
## Sample 15 - 4052 reads in 1134 unique sequences.
## Sample 16 - 7369 reads in 1635 unique sequences.
## Sample 17 - 4765 reads in 1084 unique sequences.
## Sample 18 - 4871 reads in 1161 unique sequences.
## Sample 19 - 6504 reads in 1502 unique sequences.
## DADA2 inference on first sample:
## dada-class: object describing DADA2 denoising results
## 128 sequence variants were inferred from 1979 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16

7 Merge Paired Reads

if (length(fnFs) > 0) {
  # Merge paired end reads
  mergers <- mergePairs(
    dadaFs, filtFs,
    dadaRs, filtRs,
    minOverlap = config$dada2$merge$minOverlap,
    maxMismatch = config$dada2$merge$maxMismatch,
    verbose = TRUE
  )
  
  # Inspect the merger data.frame from the first sample
  cat("Merged reads for first sample:\n")
  print(head(mergers[[1]]))
}
## Merged reads for first sample:
##                                                                                                                                                                                                                                                       sequence
## 1 TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG
## 2 TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG
## 3 TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG
## 4 TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG
## 5 TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG
## 6 TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGATGCCAAGTCAGCGGTAAAAAAGCGGTGCTCAACGCCGTCGAGCCGTTGAAACTGGCGTTCTTGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG
##   abundance forward reverse nmatch nmismatch nindel prefer accept
## 1       579       1       1    148         0      0      1   TRUE
## 2       470       2       2    148         0      0      2   TRUE
## 3       449       3       4    148         0      0      1   TRUE
## 4       430       4       3    148         0      0      2   TRUE
## 5       345       5       6    148         0      0      1   TRUE
## 6       282       6       5    148         0      0      2   TRUE

8 Construct Sequence Table

if (length(fnFs) > 0) {
  # Construct sequence table
  seqtab <- makeSequenceTable(mergers)
  
  cat("Sequence table dimensions:", dim(seqtab), "\n")
  cat("Sequence length distribution:\n")
  print(table(nchar(getSequences(seqtab))))
}
## Sequence table dimensions: 19 277 
## Sequence length distribution:
## 
## 251 252 253 254 255 
##   1  85 184   5   2

9 Remove Chimeras

if (length(fnFs) > 0) {
  # Remove chimeric sequences
  seqtab.nochim <- removeBimeraDenovo(
    seqtab,
    method = config$dada2$chimera$method,
    multithread = TRUE,
    verbose = TRUE
  )
  
  cat("Dimensions after chimera removal:", dim(seqtab.nochim), "\n")
  cat("Proportion of non-chimeric sequences:", 
      sum(seqtab.nochim) / sum(seqtab), "\n")
}
## Dimensions after chimera removal: 19 217 
## Proportion of non-chimeric sequences: 0.9626902

10 Track Reads Through Pipeline

if (length(fnFs) > 0) {
  # Create function to get number of unique sequences
  getN <- function(x) sum(getUniques(x))
  
  # Build tracking table
  track <- cbind(
    out,
    sapply(dadaFs, getN),
    sapply(dadaRs, getN),
    sapply(mergers, getN),
    rowSums(seqtab.nochim)
  )
  
  colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", 
                       "merged", "nonchim")
  rownames(track) <- sample.names
  
  print(track)
  
  # Save tracking table
  write.csv(track, file.path(table_path, "read_tracking.csv"))
  
  # Visualize read tracking
  track_df <- as.data.frame(track)
  track_df$sample <- rownames(track_df)
  track_long <- tidyr::pivot_longer(
    track_df,
    cols = -sample,
    names_to = "step",
    values_to = "reads"
  )
  track_long$step <- factor(
    track_long$step,
    levels = c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim")
  )
  
  ggplot(track_long, aes(x = step, y = reads, group = sample, color = sample)) +
    geom_line() +
    geom_point() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Read Tracking Through Pipeline",
         x = "Pipeline Step",
         y = "Number of Reads")
  
  ggsave(file.path(fig_path, "read_tracking.png"), 
         width = 10, height = 6, dpi = 300)
}
##        input filtered denoisedF denoisedR merged nonchim
## F3D0    7793     7113      6999      7004   6577    6565
## F3D1    5869     5299      5227      5240   5024    5013
## F3D141  5958     5463      5339      5344   4960    4837
## F3D142  3183     2914      2796      2833   2597    2523
## F3D143  3178     2941      2823      2862   2553    2519
## F3D144  4827     4312      4175      4225   3625    3486
## F3D145  7377     6741      6594      6623   6072    5813
## F3D146  5021     4560      4448      4467   3936    3848
## F3D147 17070    15637     15440     15513  14051   12823
## F3D148 12405    11413     11253     11211  10359    9763
## F3D149 13083    12017     11867     11909  11177   10661
## F3D150  5509     5032      4864      4930   4313    4231
## F3D2   19620    18075     17905     17916  17317   16725
## F3D3    6758     6250      6147      6180   5854    5489
## F3D5    4448     4052      3948      3983   3737    3737
## F3D6    7989     7369      7237      7295   6857    6670
## F3D7    5129     4765      4651      4687   4438    4220
## F3D8    5294     4871      4781      4776   4554    4525
## F3D9    7070     6504      6342      6442   6095    6018

11 Assign Taxonomy

if (length(fnFs) > 0) {
  if (analysis_type == "bacteria") {
    cat("Assigning bacterial taxonomy using", 
        config$taxonomy$bacteria$database, "\n")
    
    # Check if database files exist
    db_path <- config$taxonomy$bacteria$silva_train_set
    if (file.exists(db_path)) {
      taxa <- assignTaxonomy(
        seqtab.nochim,
        db_path,
        multithread = TRUE
      )
      
      # Add species-level annotation if available
      species_db <- config$taxonomy$bacteria$silva_species
      if (file.exists(species_db)) {
        taxa <- addSpecies(taxa, species_db)
      }
    } else {
      cat("Warning: Database file not found at", db_path, "\n")
      cat("Creating placeholder taxonomy table\n")
      taxa <- matrix(
        "Unassigned",
        nrow = nrow(seqtab.nochim),
        ncol = 7
      )
      colnames(taxa) <- c("Kingdom", "Phylum", "Class", "Order", 
                         "Family", "Genus", "Species")
    }
    
  } else if (analysis_type == "fungi") {
    cat("Assigning fungal taxonomy using", 
        config$taxonomy$fungi$database, "\n")
    
    # Check if database files exist
    db_path <- config$taxonomy$fungi$unite_train_set
    if (file.exists(db_path)) {
      taxa <- assignTaxonomy(
        seqtab.nochim,
        db_path,
        multithread = TRUE
      )
    } else {
      cat("Warning: Database file not found at", db_path, "\n")
      cat("Creating placeholder taxonomy table\n")
      taxa <- matrix(
        "Unassigned",
        nrow = nrow(seqtab.nochim),
        ncol = 7
      )
      colnames(taxa) <- c("Kingdom", "Phylum", "Class", "Order", 
                         "Family", "Genus", "Species")
    }
  }
  
  # Inspect taxonomic assignments
  cat("\nTaxonomic assignments (first few):\n")
  print(head(taxa))
  
  # Save taxonomy table
  if (config$output$save_intermediate) {
    saveRDS(taxa, file.path(output_path, "taxonomy.rds"))
  }
}
## Assigning bacterial taxonomy using silva 
## 
## Taxonomic assignments (first few):
##                                                                                                                                                                                                                                                               Kingdom   
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG  "Bacteria"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG  "Bacteria"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG  "Bacteria"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG  "Bacteria"
## TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGG "Bacteria"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG  "Bacteria"
##                                                                                                                                                                                                                                                               Phylum        
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG  "Bacteroidota"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG  "Bacteroidota"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG  "Bacteroidota"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG  "Bacteroidota"
## TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGG "Bacteroidota"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG  "Bacteroidota"
##                                                                                                                                                                                                                                                               Class        
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG  "Bacteroidia"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG  "Bacteroidia"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG  "Bacteroidia"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG  "Bacteroidia"
## TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGG "Bacteroidia"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG  "Bacteroidia"
##                                                                                                                                                                                                                                                               Order          
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG  "Bacteroidales"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG  "Bacteroidales"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG  "Bacteroidales"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG  "Bacteroidales"
## TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGG "Bacteroidales"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG  "Bacteroidales"
##                                                                                                                                                                                                                                                               Family          
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG  "Muribaculaceae"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG  "Muribaculaceae"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG  "Muribaculaceae"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG  "Muribaculaceae"
## TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGG "Bacteroidaceae"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG  "Muribaculaceae"
##                                                                                                                                                                                                                                                               Genus        
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG  NA           
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG  NA           
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG  NA           
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG  NA           
## TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGG "Bacteroides"
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG  NA           
##                                                                                                                                                                                                                                                               Species
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGAAGATCAAGTCAGCGGTAAAATTGAGAGGCTCAACCTCTTCGAGCCGTTGAAACTGGTTTTCTTGAGTGAGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCTCAACTGACGCTCATGCACGAAAGTGTGGGTATCGAACAGG  NA     
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGCAGGCGGACTCTCAAGTCAGCGGTCAAATCGCGGGGCTCAACCCCGTTCCGCCGTTGAAACTGGGAGCCTTGAGTGCGCGAGAAGTAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCCTACCGGCGCGCAACTGACGCTCATGCACGAAAGCGTGGGTATCGAACAGG  NA     
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTGTTAAGTCAGCGGTCAAATGTCGGGGCTCAACCCCGGCCTGCCGTTGAAACTGGCGGCCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACTCCGATTGCGAAGGCAGCATACCGGCGCCCGACTGACGCTGAGGCACGAAAGCGTGGGTATCGAACAGG  NA     
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGGCTTTTAAGTCAGCGGTAAAAATTCGGGGCTCAACCCCGTCCGGCCGTTGAAACTGGGGGCCTTGAGTGGGCGAGAAGAAGGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCCTTCCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCGAACAGG  NA     
## TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGGTGTAGCGGTGAAATGCTTAGATATCACGAAGAACTCCGATTGCGAAGGCAGCTCACTGGACTGCAACTGACACTGATGCTCGAAAGTGTGGGTATCAAACAGG NA     
## TACGGAGGATGCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGGCGGCCTGCCAAGTCAGCGGTAAAATTGCGGGGCTCAACCCCGTACAGCCGTTGAAACTGCCGGGCTCGAGTGGGCGAGAAGTATGCGGAATGCGTGGTGTAGCGGTGAAATGCATAGATATCACGCAGAACCCCGATTGCGAAGGCAGCATACCGGCGCCCTACTGACGCTGAGGCACGAAAGTGCGGGGATCAAACAGG  NA

12 Create Phyloseq Object

if (length(fnFs) > 0) {
  # Read sample metadata if it exists
  metadata_file <- config$metadata$file
  if (file.exists(metadata_file)) {
    samdf <- read.csv(metadata_file, row.names = 1)
    cat("Loaded metadata for", nrow(samdf), "samples\n")
  } else {
    cat("Warning: Metadata file not found. Creating minimal metadata.\n")
    samdf <- data.frame(
      SampleID = sample.names,
      row.names = sample.names
    )
  }
  
  # Create phyloseq object
  ps <- phyloseq(
    otu_table(seqtab.nochim, taxa_are_rows = FALSE),
    sample_data(samdf),
    tax_table(taxa)
  )
  
  cat("\nPhyloseq object summary:\n")
  print(ps)
  
  # Save phyloseq object
  if (config$output$save_intermediate) {
    saveRDS(ps, file.path(output_path, "phyloseq_object.rds"))
  }
}
## Loaded metadata for 19 samples
## 
## Phyloseq object summary:
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 217 taxa and 19 samples ]
## sample_data() Sample Data:       [ 19 samples by 3 sample variables ]
## tax_table()   Taxonomy Table:    [ 217 taxa by 7 taxonomic ranks ]

13 Quality Filtering

if (length(fnFs) > 0) {
  # Remove samples with fewer than minimum reads
  min_reads <- config$phyloseq$min_reads
  ps_filtered <- prune_samples(sample_sums(ps) >= min_reads, ps)
  
  cat("Samples before filtering:", nsamples(ps), "\n")
  cat("Samples after filtering (>= ", min_reads, " reads):", 
      nsamples(ps_filtered), "\n")
  
  # Remove low prevalence taxa
  prev_threshold <- config$phyloseq$prevalence_threshold
  prevalence <- apply(otu_table(ps_filtered), 2, function(x) sum(x > 0) / length(x))
  ps_filtered <- prune_taxa(prevalence >= prev_threshold, ps_filtered)
  
  cat("Taxa after prevalence filtering (>=", prev_threshold, "):", 
      ntaxa(ps_filtered), "\n")
  
  # Update phyloseq object
  ps <- ps_filtered
  
  # Save filtered phyloseq object
  if (config$output$save_intermediate) {
    saveRDS(ps, file.path(output_path, "phyloseq_filtered.rds"))
  }
}
## Samples before filtering: 19 
## Samples after filtering (>=  1000  reads): 19 
## Taxa after prevalence filtering (>= 0.05 ): 217

14 Basic Visualizations

14.1 Alpha Diversity

if (length(fnFs) > 0 && nsamples(ps) > 0) {
  # Plot alpha diversity
  p <- plot_richness(ps, measures = c("Shannon", "Simpson", "Chao1")) +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Alpha Diversity Measures")
  
  print(p)
  
  ggsave(file.path(fig_path, "alpha_diversity.png"), 
         width = 10, height = 6, dpi = 300)
}

14.2 Taxonomic Composition

if (length(fnFs) > 0 && nsamples(ps) > 0) {
  # Plot taxonomic composition at Phylum level
  ps_phylum <- tax_glom(ps, "Phylum", NArm = TRUE)
  
  p <- plot_bar(ps_phylum, fill = "Phylum") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Taxonomic Composition at Phylum Level",
         x = "Sample",
         y = "Abundance")
  
  print(p)
  
  ggsave(file.path(fig_path, "taxonomic_composition_phylum.png"), 
         width = 12, height = 8, dpi = 300)
}

14.3 Ordination

if (length(fnFs) > 0 && nsamples(ps) > 1) {
  # Transform to relative abundance
  ps_rel <- transform_sample_counts(ps, function(x) x / sum(x))
  
  # Perform ordination (PCoA with Bray-Curtis)
  ord <- ordinate(ps_rel, method = "PCoA", distance = "bray")
  
  p <- plot_ordination(ps_rel, ord, type = "samples") +
    theme_bw() +
    labs(title = "PCoA Ordination (Bray-Curtis Distance)")
  
  print(p)
  
  ggsave(file.path(fig_path, "ordination_pcoa.png"), 
         width = 10, height = 8, dpi = 300)
}

15 Session Information

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
## [1] tidyr_1.3.1     dplyr_1.1.4     yaml_2.3.10     ggplot2_4.0.1  
## [5] phyloseq_1.50.0 dada2_1.34.0    Rcpp_1.1.0     
## 
## loaded via a namespace (and not attached):
##   [1] bitops_1.0-9                deldir_2.0-4               
##   [3] permute_0.9-8               rlang_1.1.6                
##   [5] magrittr_2.0.4              ade4_1.7-23                
##   [7] matrixStats_1.5.0           compiler_4.4.2             
##   [9] mgcv_1.9-1                  systemfonts_1.3.1          
##  [11] png_0.1-8                   vctrs_0.6.5                
##  [13] reshape2_1.4.5              stringr_1.6.0              
##  [15] pwalign_1.2.0               pkgconfig_2.0.3            
##  [17] crayon_1.5.3                fastmap_1.2.0              
##  [19] XVector_0.46.0              labeling_0.4.3             
##  [21] Rsamtools_2.22.0            rmarkdown_2.30             
##  [23] UCSC.utils_1.2.0            ragg_1.5.0                 
##  [25] purrr_1.2.0                 xfun_0.54                  
##  [27] zlibbioc_1.52.0             cachem_1.1.0               
##  [29] GenomeInfoDb_1.42.3         jsonlite_2.0.0             
##  [31] biomformat_1.34.0           rhdf5filters_1.18.1        
##  [33] DelayedArray_0.32.0         Rhdf5lib_1.28.0            
##  [35] BiocParallel_1.40.2         jpeg_0.1-11                
##  [37] parallel_4.4.2              cluster_2.1.6              
##  [39] R6_2.6.1                    bslib_0.9.0                
##  [41] stringi_1.8.7               RColorBrewer_1.1-3         
##  [43] GenomicRanges_1.58.0        jquerylib_0.1.4            
##  [45] SummarizedExperiment_1.36.0 iterators_1.0.14           
##  [47] knitr_1.50                  IRanges_2.40.1             
##  [49] Matrix_1.7-1                splines_4.4.2              
##  [51] igraph_2.2.1                tidyselect_1.2.1           
##  [53] rstudioapi_0.17.1           abind_1.4-8                
##  [55] vegan_2.7-2                 codetools_0.2-20           
##  [57] hwriter_1.3.2.1             lattice_0.22-6             
##  [59] tibble_3.3.0                plyr_1.8.9                 
##  [61] Biobase_2.66.0              withr_3.0.2                
##  [63] ShortRead_1.64.0            S7_0.2.1                   
##  [65] evaluate_1.0.5              survival_3.7-0             
##  [67] RcppParallel_5.1.11-1       Biostrings_2.74.1          
##  [69] pillar_1.11.1               BiocManager_1.30.27        
##  [71] MatrixGenerics_1.18.1       renv_1.1.5                 
##  [73] foreach_1.5.2               stats4_4.4.2               
##  [75] generics_0.1.4              S4Vectors_0.44.0           
##  [77] scales_1.4.0                glue_1.8.0                 
##  [79] tools_4.4.2                 interp_1.1-6               
##  [81] data.table_1.17.8           GenomicAlignments_1.42.0   
##  [83] rhdf5_2.50.2                grid_4.4.2                 
##  [85] ape_5.8-1                   latticeExtra_0.6-31        
##  [87] nlme_3.1-166                GenomeInfoDbData_1.2.13    
##  [89] cli_3.6.5                   textshaping_1.0.4          
##  [91] S4Arrays_1.6.0              gtable_0.3.6               
##  [93] sass_0.4.10                 digest_0.6.39              
##  [95] BiocGenerics_0.52.0         SparseArray_1.6.2          
##  [97] farver_2.1.2                htmltools_0.5.8.1          
##  [99] multtest_2.62.0             lifecycle_1.0.4            
## [101] httr_1.4.7                  MASS_7.3-61

16 Summary

This pipeline has completed the following steps:

  1. ✓ Quality control and visualization of raw reads
  2. ✓ Filtering and trimming of sequences
  3. ✓ Error rate learning
  4. ✓ Sample inference using DADA2
  5. ✓ Merging of paired-end reads
  6. ✓ Chimera removal
  7. ✓ Taxonomy assignment (bacterial or fungal)
  8. ✓ Phyloseq object creation
  9. ✓ Basic diversity and composition analyses

All output files have been saved to the output/ directory.